From 139ea93512aeead8a4aee3910a3de86eb109a838 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 5 Nov 2021 15:52:31 +0100
Subject: VideoCore: implement channels on gpu caches.

---
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    |  34 +-
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h |  14 +-
 src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp  |  34 +-
 src/core/hle/service/nvdrv/devices/nvhost_gpu.h    |   9 +
 src/core/hle/service/nvdrv/devices/nvmap.cpp       |   2 +-
 src/core/hle/service/nvdrv/nvdrv.cpp               |   2 +-
 src/video_core/CMakeLists.txt                      |   8 +
 src/video_core/buffer_cache/buffer_cache.h         | 103 ++---
 src/video_core/control/channel_state.cpp           |  44 ++
 src/video_core/control/channel_state.h             |  69 +++
 src/video_core/control/channel_state_cache.cpp     |   5 +
 src/video_core/control/channel_state_cache.h       |  68 +++
 src/video_core/control/channel_state_cache.inc     |  64 +++
 src/video_core/control/scheduler.cpp               |  31 ++
 src/video_core/control/scheduler.h                 |  38 ++
 src/video_core/dma_pusher.cpp                      |  23 +-
 src/video_core/dma_pusher.h                        |  13 +-
 src/video_core/engines/puller.cpp                  | 297 +++++++++++++
 src/video_core/engines/puller.h                    | 179 ++++++++
 src/video_core/fence_manager.h                     |  28 +-
 src/video_core/gpu.cpp                             | 468 ++++-----------------
 src/video_core/gpu.h                               |  55 +--
 src/video_core/gpu_thread.cpp                      |  14 +-
 src/video_core/gpu_thread.h                        |  12 +-
 src/video_core/memory_manager.cpp                  |   5 -
 src/video_core/query_cache.h                       |  18 +-
 src/video_core/rasterizer_interface.h              |   9 +
 .../renderer_opengl/gl_fence_manager.cpp           |   4 +-
 src/video_core/renderer_opengl/gl_fence_manager.h  |   4 +-
 src/video_core/renderer_opengl/gl_query_cache.cpp  |   5 +-
 src/video_core/renderer_opengl/gl_query_cache.h    |   3 +-
 src/video_core/renderer_opengl/gl_rasterizer.cpp   |  14 +-
 src/video_core/renderer_opengl/gl_shader_cache.cpp |  39 +-
 src/video_core/renderer_opengl/gl_shader_cache.h   |   9 +-
 src/video_core/renderer_vulkan/renderer_vulkan.cpp |  17 +-
 .../renderer_vulkan/vk_fence_manager.cpp           |   4 +-
 src/video_core/renderer_vulkan/vk_fence_manager.h  |   4 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp          |  28 +-
 src/video_core/renderer_vulkan/vk_pipeline_cache.h |   6 +-
 src/video_core/renderer_vulkan/vk_query_cache.cpp  |   7 +-
 src/video_core/renderer_vulkan/vk_query_cache.h    |   5 +-
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |  87 ++--
 src/video_core/renderer_vulkan/vk_rasterizer.h     |  20 +-
 .../renderer_vulkan/vk_state_tracker.cpp           |  13 +-
 src/video_core/renderer_vulkan/vk_state_tracker.h  |  22 +-
 src/video_core/shader_cache.cpp                    |  33 +-
 src/video_core/shader_cache.h                      |  15 +-
 src/video_core/texture_cache/image_base.h          |   3 +
 src/video_core/texture_cache/texture_cache.h       | 209 +++++----
 src/video_core/texture_cache/texture_cache_base.h  |  73 +++-
 50 files changed, 1461 insertions(+), 809 deletions(-)
 create mode 100644 src/video_core/control/channel_state.cpp
 create mode 100644 src/video_core/control/channel_state.h
 create mode 100644 src/video_core/control/channel_state_cache.cpp
 create mode 100644 src/video_core/control/channel_state_cache.h
 create mode 100644 src/video_core/control/channel_state_cache.inc
 create mode 100644 src/video_core/control/scheduler.cpp
 create mode 100644 src/video_core/control/scheduler.h
 create mode 100644 src/video_core/engines/puller.cpp
 create mode 100644 src/video_core/engines/puller.h

diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index b1c683511..9946ce624 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,13 +10,17 @@
 #include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/nvdrv.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 
 namespace Service::Nvidia::Devices {
 
-nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core)
-    : nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
+nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
+    : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
+      gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;
 
 NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
 
     const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
     if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
-        params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
+        params.offset = *(gmmu->AllocateFixed(params.offset, size));
     } else {
-        params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
+        params.offset = gmmu->Allocate(size, params.align);
     }
 
     auto result = NvResult::Success;
@@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
     LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
               params.pages, params.page_size);
 
-    system.GPU().MemoryManager().Unmap(params.offset,
-                                       static_cast<std::size_t>(params.pages) * params.page_size);
+    gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);
 
     std::memcpy(output.data(), &params, output.size());
     return NvResult::Success;
@@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
             // If nvmap handle is null, we should unmap instead.
             const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
             const auto size{static_cast<u64>(entry.pages) << 0x10};
-            system.GPU().MemoryManager().Unmap(offset, size);
+            gmmu->Unmap(offset, size);
             continue;
         }
 
@@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
         const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
         const auto size{static_cast<u64>(entry.pages) << 0x10};
         const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
-        const auto addr{
-            system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)};
+        const auto addr{gmmu->Map(object->address + map_offset, offset, size)};
 
         if (!addr) {
             LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
@@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
               params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
               params.offset);
 
-    auto& gpu = system.GPU();
     if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
         if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
             const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
             const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
 
-            if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
+            if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
                 LOG_CRITICAL(Service_NVDRV,
                              "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
                              "mapping_size = {}, offset={}",
@@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
 
     const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
     if (is_alloc) {
-        params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
+        params.offset = gmmu->MapAllocate(physical_address, size, page_size);
     } else {
-        params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
+        params.offset = gmmu->Map(physical_address, params.offset, size);
     }
 
     auto result = NvResult::Success;
@@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
     LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
 
     if (const auto size{RemoveBufferMap(params.offset)}; size) {
-        system.GPU().MemoryManager().Unmap(params.offset, *size);
+        gmmu->Unmap(params.offset, *size);
     } else {
         LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
     }
@@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
 NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
     IoctlBindChannel params{};
     std::memcpy(&params, input.data(), input.size());
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
+    LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
 
-    channel = params.fd;
+    auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
+    gpu_channel_device->channel_state->memory_manager = gmmu;
     return NvResult::Success;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 67d2f1e87..4ecae3caf 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -13,6 +13,14 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 
+namespace Tegra {
+class MemoryManager;
+} // namespace Tegra
+
+namespace Service::Nvidia {
+class Module;
+}
+
 namespace Service::Nvidia::NvCore {
 class Container;
 class NvMap;
@@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
 
 class nvhost_as_gpu final : public nvdevice {
 public:
-    explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core);
+    explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
     ~nvhost_as_gpu() override;
 
     NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -187,9 +195,13 @@ private:
     void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
     std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
 
+    Module& module;
+
     NvCore::Container& container;
     NvCore::NvMap& nvmap;
 
+    std::shared_ptr<Tegra::MemoryManager> gmmu;
+
     // This is expected to be ordered, therefore we must use a map, not unordered_map
     std::map<GPUVAddr, BufferMap> buffer_mappings;
 };
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index cb54ee5a4..38d45cb79 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -11,12 +11,14 @@
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/memory.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/engines/puller.h"
 #include "video_core/gpu.h"
 
 namespace Service::Nvidia::Devices {
 namespace {
-Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
-    Tegra::GPU::FenceAction result{};
+Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
+    Tegra::Engines::Puller::FenceAction result{};
     result.op.Assign(op);
     result.syncpoint_id.Assign(syncpoint_id);
     return {result.raw};
@@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi
 nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
                        NvCore::Container& core_)
     : nvdevice{system_}, events_interface{events_interface_}, core{core_},
-      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {
+      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
+      channel_state{system.GPU().AllocateChannel()} {
     channel_fence.id = syncpoint_manager.AllocateSyncpoint();
     channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
     sm_exception_breakpoint_int_report_event =
@@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                 params.unk3);
 
+    if (channel_state->initiated) {
+        LOG_CRITICAL(Service_NVDRV, "Already allocated!");
+        return NvResult::AlreadyAllocated;
+    }
+
+    system.GPU().InitChannel(*channel_state);
     channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
 
     params.fence_out = channel_fence;
@@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
         {fence.value},
         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
                                   Tegra::SubmissionMode::Increasing),
-        BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
+        BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
     };
 }
 
@@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
     for (u32 count = 0; count < add_increment; ++count) {
         result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
                                                       Tegra::SubmissionMode::Increasing));
-        result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
+        result.emplace_back(
+            BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
     }
 
     return result;
@@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
 
     auto& gpu = system.GPU();
 
+    const auto bind_id = channel_state->bind_id;
+
     params.fence_out.id = channel_fence.id;
 
     if (params.flags.add_wait.Value() &&
         !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
-        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+        gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
     }
 
     if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
@@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
         params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
     }
 
-    gpu.PushGPUEntries(std::move(entries));
+    gpu.PushGPUEntries(bind_id, std::move(entries));
 
     if (params.flags.add_increment.Value()) {
         if (params.flags.suppress_wfi) {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList(
+                                            params.fence_out, params.AddIncrementValue())});
         } else {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList(
+                                            params.fence_out, params.AddIncrementValue())});
         }
     }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 440c0c42d..3a65ed06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -13,6 +13,12 @@
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "video_core/dma_pusher.h"
 
+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
 namespace Service::Nvidia {
 
 namespace NvCore {
@@ -26,6 +32,7 @@ class EventInterface;
 
 namespace Service::Nvidia::Devices {
 
+class nvhost_as_gpu;
 class nvmap;
 class nvhost_gpu final : public nvdevice {
 public:
@@ -46,6 +53,7 @@ public:
     Kernel::KEvent* QueryEvent(u32 event_id) override;
 
 private:
+    friend class nvhost_as_gpu;
     enum class CtxObjects : u32_le {
         Ctx2D = 0x902D,
         Ctx3D = 0xB197,
@@ -204,6 +212,7 @@ private:
     NvCore::Container& core;
     NvCore::SyncpointManager& syncpoint_manager;
     NvCore::NvMap& nvmap;
+    std::shared_ptr<Tegra::Control::ChannelState> channel_state;
     NvFence channel_fence;
 
     // Events
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 57f58055d..279997e81 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
     IocFromIdParams params;
     std::memcpy(&params, input.data(), sizeof(params));
 
-    LOG_DEBUG(Service_NVDRV, "called, id:{}");
+    LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);
 
     // Handles and IDs are always the same value in nvmap however IDs can be used globally given the
     // right permissions.
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 208de0b75..b39a4c6db 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -74,7 +74,7 @@ Module::Module(Core::System& system)
     : service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} {
     builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
         std::shared_ptr<Devices::nvdevice> device =
-            std::make_shared<Devices::nvhost_as_gpu>(system, container);
+            std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
         return open_files.emplace(fd, device).first;
     };
     builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 5b3808351..e216c51a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -35,6 +35,12 @@ add_library(video_core STATIC
     command_classes/vic.h
     compatible_formats.cpp
     compatible_formats.h
+    control/channel_state.cpp
+    control/channel_state.h
+    control/channel_state_cache.cpp
+    control/channel_state_cache.h
+    control/scheduler.cpp
+    control/scheduler.h
     delayed_destruction_ring.h
     dirty_flags.cpp
     dirty_flags.h
@@ -54,6 +60,8 @@ add_library(video_core STATIC
     engines/maxwell_3d.h
     engines/maxwell_dma.cpp
     engines/maxwell_dma.h
+    engines/puller.cpp
+    engines/puller.h
     framebuffer_config.h
     macro/macro.cpp
     macro/macro.h
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f015dae56..6b6764d72 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,7 +5,6 @@
 
 #include <algorithm>
 #include <array>
-#include <deque>
 #include <memory>
 #include <mutex>
 #include <numeric>
@@ -23,6 +22,7 @@
 #include "common/settings.h"
 #include "core/memory.h"
 #include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
 using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
 
 template <typename P>
-class BufferCache {
+class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
 
     // Page size for caching purposes.
     // This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
     static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
 
     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                         Runtime& runtime_);
+                         Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
 
     void TickFrame();
 
@@ -367,9 +364,6 @@ private:
     void ClearDownload(IntervalType subtract_interval);
 
     VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
     Core::Memory::Memory& cpu_memory;
 
     SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:
 
 template <class P>
 BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                            Tegra::Engines::Maxwell3D& maxwell3d_,
-                            Tegra::Engines::KeplerCompute& kepler_compute_,
-                            Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                            Runtime& runtime_)
-    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
+                            Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
+    : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
     // Ensure the first slot is used for the null buffer
     void(slot_buffers.insert(runtime, NullBufferParams{}));
     common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
 
 template <class P>
 bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
-    const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
-    const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+    const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
+    const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
     if (!cpu_src_address || !cpu_dest_address) {
         return false;
     }
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
 
 template <class P>
 bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
-    const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+    const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
     if (!cpu_dst_address) {
         return false;
     }
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
 template <class P>
 void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
                                                u32 size) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
     const Binding binding{
         .cpu_addr = *cpu_addr,
         .size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
     if (is_indexed) {
         BindHostIndexBuffer();
     } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
-        const auto& regs = maxwell3d.regs;
+        const auto& regs = maxwell3d->regs;
         if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
             runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
         }
@@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
     enabled_storage_buffers[stage] |= 1U << ssbo_index;
     written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
 
-    const auto& cbufs = maxwell3d.state.shader_stages[stage];
+    const auto& cbufs = maxwell3d->state.shader_stages[stage];
     const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
     storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
 }
@@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
     enabled_compute_storage_buffers |= 1U << ssbo_index;
     written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
 
-    const auto& launch_desc = kepler_compute.launch_description;
+    const auto& launch_desc = kepler_compute->launch_description;
     ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
 
     const auto& cbufs = launch_desc.const_buffer_config;
@@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
     const u32 size = index_buffer.size;
     SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
     if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
-        const u32 new_offset = offset + maxwell3d.regs.index_array.first *
-                                            maxwell3d.regs.index_array.FormatSizeInBytes();
+        const u32 new_offset = offset + maxwell3d->regs.index_array.first *
+                                            maxwell3d->regs.index_array.FormatSizeInBytes();
         runtime.BindIndexBuffer(buffer, new_offset, size);
     } else {
-        runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
-                                maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
-                                buffer, offset, size);
+        runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
+                                maxwell3d->regs.index_array.first,
+                                maxwell3d->regs.index_array.count, buffer, offset, size);
     }
 }
 
 template <class P>
 void BufferCache<P>::BindHostVertexBuffers() {
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
     for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
         const Binding& binding = vertex_buffers[index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
         }
         flags[Dirty::VertexBuffer0 + index] = false;
 
-        const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+        const u32 stride = maxwell3d->regs.vertex_array[index].stride;
         const u32 offset = buffer.Offset(binding.cpu_addr);
         runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
     }
@@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
 
 template <class P>
 void BufferCache<P>::BindHostTransformFeedbackBuffers() {
-    if (maxwell3d.regs.tfb_enabled == 0) {
+    if (maxwell3d->regs.tfb_enabled == 0) {
         return;
     }
     for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1262,8 +1252,8 @@ template <class P>
 void BufferCache<P>::UpdateIndexBuffer() {
     // We have to check for the dirty flags and index count
     // The index count is currently changed without updating the dirty flags
-    const auto& index_array = maxwell3d.regs.index_array;
-    auto& flags = maxwell3d.dirty.flags;
+    const auto& index_array = maxwell3d->regs.index_array;
+    auto& flags = maxwell3d->dirty.flags;
     if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
         return;
     }
@@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
 
     const GPUVAddr gpu_addr_begin = index_array.StartAddress();
     const GPUVAddr gpu_addr_end = index_array.EndAddress();
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
     const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
     const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
     const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
 
 template <class P>
 void BufferCache<P>::UpdateVertexBuffers() {
-    auto& flags = maxwell3d.dirty.flags;
-    if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+    auto& flags = maxwell3d->dirty.flags;
+    if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
         return;
     }
     flags[Dirty::VertexBuffers] = false;
@@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() {
 
 template <class P>
 void BufferCache<P>::UpdateVertexBuffer(u32 index) {
-    if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+    if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
         return;
     }
-    const auto& array = maxwell3d.regs.vertex_array[index];
-    const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+    const auto& array = maxwell3d->regs.vertex_array[index];
+    const auto& limit = maxwell3d->regs.vertex_array_limit[index];
     const GPUVAddr gpu_addr_begin = array.StartAddress();
     const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
-    u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
-    if (address_size >= 64_MiB) {
-        // Reported vertex buffer size is very large, cap to mapped buffer size
-        GPUVAddr submapped_addr_end = gpu_addr_begin;
-
-        const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
-        if (ranges.size() > 0) {
-            const auto& [addr, size] = *ranges.begin();
-            submapped_addr_end = addr + size;
-        }
-
-        address_size =
-            std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
-    }
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
+    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
     const u32 size = address_size; // TODO: Analyze stride and number of vertices
     if (array.enable == 0 || size == 0 || !cpu_addr) {
         vertex_buffers[index] = NULL_BINDING;
@@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
 
 template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffers() {
-    if (maxwell3d.regs.tfb_enabled == 0) {
+    if (maxwell3d->regs.tfb_enabled == 0) {
         return;
     }
     for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
 
 template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
-    const auto& binding = maxwell3d.regs.tfb_bindings[index];
+    const auto& binding = maxwell3d->regs.tfb_bindings[index];
     const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
     const u32 size = binding.buffer_size;
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
     if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
         transform_feedback_buffers[index] = NULL_BINDING;
         return;
@@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
     ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
         Binding& binding = compute_uniform_buffers[index];
         binding = NULL_BINDING;
-        const auto& launch_desc = kepler_compute.launch_description;
+        const auto& launch_desc = kepler_compute->launch_description;
         if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
             const auto& cbuf = launch_desc.const_buffer_config[index];
-            const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+            const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
             if (cpu_addr) {
                 binding.cpu_addr = *cpu_addr;
                 binding.size = cbuf.size;
@@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
         dirty_uniform_buffers.fill(~u32{0});
         uniform_buffer_binding_sizes.fill({});
     }
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
     flags[Dirty::IndexBuffer] = true;
     flags[Dirty::VertexBuffers] = true;
     for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() {
 
 template <class P>
 typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
-    const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
-    const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
+    const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
     if (!cpu_addr || size == 0) {
         return NULL_BINDING;
     }
@@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
 template <class P>
 typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
     GPUVAddr gpu_addr, u32 size, PixelFormat format) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
     TextureBufferBinding binding;
     if (!cpu_addr || size == 0) {
         binding.cpu_addr = 0;
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
new file mode 100644
index 000000000..67803fe94
--- /dev/null
+++ b/src/video_core/control/channel_state.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Control {
+
+ChannelState::ChannelState(s32 bind_id_) {
+    bind_id = bind_id_;
+    initiated = false;
+}
+
+void ChannelState::Init(Core::System& system, GPU& gpu) {
+    ASSERT(memory_manager);
+    dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
+    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
+    fermi_2d = std::make_unique<Engines::Fermi2D>();
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
+    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
+    initiated = true;
+}
+
+void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+    dma_pusher->BindRasterizer(rasterizer);
+    memory_manager->BindRasterizer(rasterizer);
+    maxwell_3d->BindRasterizer(rasterizer);
+    fermi_2d->BindRasterizer(rasterizer);
+    kepler_memory->BindRasterizer(rasterizer);
+    kepler_compute->BindRasterizer(rasterizer);
+    maxwell_dma->BindRasterizer(rasterizer);
+}
+
+} // namespace Tegra::Control
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
new file mode 100644
index 000000000..82808a6b8
--- /dev/null
+++ b/src/video_core/control/channel_state.h
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra {
+
+class GPU;
+
+namespace Engines {
+class Puller;
+class Fermi2D;
+class Maxwell3D;
+class MaxwellDMA;
+class KeplerCompute;
+class KeplerMemory;
+} // namespace Engines
+
+class MemoryManager;
+class DmaPusher;
+
+namespace Control {
+
+struct ChannelState {
+    ChannelState(s32 bind_id);
+    ChannelState(const ChannelState& state) = delete;
+    ChannelState& operator=(const ChannelState&) = delete;
+    ChannelState(ChannelState&& other) noexcept = default;
+    ChannelState& operator=(ChannelState&& other) noexcept = default;
+
+    void Init(Core::System& system, GPU& gpu);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    s32 bind_id = -1;
+    /// 3D engine
+    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+    /// 2D engine
+    std::unique_ptr<Engines::Fermi2D> fermi_2d;
+    /// Compute engine
+    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
+    /// DMA engine
+    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+    /// Inline memory engine
+    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+
+    std::shared_ptr<MemoryManager> memory_manager;
+
+    std::unique_ptr<DmaPusher> dma_pusher;
+
+    bool initiated{};
+};
+
+} // namespace Control
+
+} // namespace Tegra
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp
new file mode 100644
index 000000000..f72a97b2f
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -0,0 +1,5 @@
+#include "video_core/control/channel_state_cache.inc"
+
+namespace VideoCommon {
+template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
+}
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
new file mode 100644
index 000000000..c8298c003
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <deque>
+#include <limits>
+#include <unordered_map>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+class KeplerCompute;
+} // namespace Engines
+
+class MemoryManager;
+
+namespace Control {
+struct ChannelState;
+}
+
+} // namespace Tegra
+
+namespace VideoCommon {
+
+class ChannelInfo {
+public:
+    ChannelInfo() = delete;
+    ChannelInfo(Tegra::Control::ChannelState& state);
+    ChannelInfo(const ChannelInfo& state) = delete;
+    ChannelInfo& operator=(const ChannelInfo&) = delete;
+    ChannelInfo(ChannelInfo&& other) = default;
+    ChannelInfo& operator=(ChannelInfo&& other) = default;
+
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+};
+
+template <class P>
+class ChannelSetupCaches {
+public:
+    /// Operations for seting the channel of execution.
+
+    /// Create channel state.
+    void CreateChannel(Tegra::Control::ChannelState& channel);
+
+    /// Bind a channel for execution.
+    void BindToChannel(s32 id);
+
+    /// Erase channel's state.
+    void EraseChannel(s32 id);
+
+protected:
+    static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
+    std::deque<P> channel_storage;
+    std::deque<size_t> free_channel_ids;
+    std::unordered_map<s32, size_t> channel_map;
+
+    P* channel_state;
+    size_t current_channel_id{UNSET_CHANNEL};
+    Tegra::Engines::Maxwell3D* maxwell3d;
+    Tegra::Engines::KeplerCompute* kepler_compute;
+    Tegra::MemoryManager* gpu_memory;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc
new file mode 100644
index 000000000..3eb73af9f
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.inc
@@ -0,0 +1,64 @@
+#include "video_core/control/channel_state.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+
+namespace VideoCommon {
+
+ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
+    : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
+      gpu_memory{*channel_state.memory_manager} {}
+
+template <class P>
+void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+    ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+    auto new_id = [this, &channel]() {
+        if (!free_channel_ids.empty()) {
+            auto id = free_channel_ids.front();
+            free_channel_ids.pop_front();
+            new (&channel_storage[id]) ChannelInfo(channel);
+            return id;
+        }
+        channel_storage.emplace_back(channel);
+        return channel_storage.size() - 1;
+    }();
+    channel_map.emplace(channel.bind_id, new_id);
+    if (current_channel_id != UNSET_CHANNEL) {
+        channel_state = &channel_storage[current_channel_id];
+    }
+}
+
+/// Bind a channel for execution.
+template <class P>
+void ChannelSetupCaches<P>::BindToChannel(s32 id) {
+    auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    current_channel_id = it->second;
+    channel_state = &channel_storage[current_channel_id];
+    maxwell3d = &channel_state->maxwell3d;
+    kepler_compute = &channel_state->kepler_compute;
+    gpu_memory = &channel_state->gpu_memory;
+}
+
+/// Erase channel's channel_state.
+template <class P>
+void ChannelSetupCaches<P>::EraseChannel(s32 id) {
+    const auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    const auto this_id = it->second;
+    free_channel_ids.push_back(this_id);
+    channel_map.erase(it);
+    if (this_id == current_channel_id) {
+        current_channel_id = UNSET_CHANNEL;
+        channel_state = nullptr;
+        maxwell3d = nullptr;
+        kepler_compute = nullptr;
+        gpu_memory = nullptr;
+    } else if (current_channel_id != UNSET_CHANNEL) {
+        channel_state = &channel_storage[current_channel_id];
+    }
+}
+
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
new file mode 100644
index 000000000..e1abcb188
--- /dev/null
+++ b/src/video_core/control/scheduler.cpp
@@ -0,0 +1,31 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
+#include "video_core/gpu.h"
+
+namespace Tegra::Control {
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
+
+Scheduler::~Scheduler() = default;
+
+void Scheduler::Push(s32 channel, CommandList&& entries) {
+    std::unique_lock<std::mutex> lk(scheduling_guard);
+    auto it = channels.find(channel);
+    auto channel_state = it->second;
+    gpu.BindChannel(channel_state->bind_id);
+    channel_state->dma_pusher->Push(std::move(entries));
+    channel_state->dma_pusher->DispatchCalls();
+}
+
+void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
+    s32 channel = new_channel->bind_id;
+    std::unique_lock<std::mutex> lk(scheduling_guard);
+    channels.emplace(channel, new_channel);
+}
+
+} // namespace Tegra::Control
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
new file mode 100644
index 000000000..802e9caff
--- /dev/null
+++ b/src/video_core/control/scheduler.h
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+
+class GPU;
+
+namespace Control {
+
+struct ChannelState;
+
+class Scheduler {
+public:
+    Scheduler(GPU& gpu_);
+    ~Scheduler();
+
+    void Push(s32 channel, CommandList&& entries);
+
+    void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
+
+private:
+    std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
+    std::mutex scheduling_guard;
+    GPU& gpu;
+};
+
+} // namespace Control
+
+} // namespace Tegra
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 29b8582ab..b01f04d0c 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,7 +12,10 @@
 
 namespace Tegra {
 
-DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
+DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+                     Control::ChannelState& channel_state_)
+    : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
+                                                                          *this, channel_state_} {}
 
 DmaPusher::~DmaPusher() = default;
 
@@ -76,11 +79,11 @@ bool DmaPusher::Step() {
         // Push buffer non-empty, read a word
         command_headers.resize(command_list_header.size);
         if (Settings::IsGPULevelHigh()) {
-            gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
-                                          command_list_header.size * sizeof(u32));
+            memory_manager.ReadBlock(dma_get, command_headers.data(),
+                                     command_list_header.size * sizeof(u32));
         } else {
-            gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                                command_list_header.size * sizeof(u32));
+            memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
+                                           command_list_header.size * sizeof(u32));
         }
     }
     for (std::size_t index = 0; index < command_headers.size();) {
@@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
 
 void DmaPusher::CallMethod(u32 argument) const {
     if (dma_state.method < non_puller_methods) {
-        gpu.CallMethod(GPU::MethodCall{
+        puller.CallPullerMethod(Engines::Puller::MethodCall{
             dma_state.method,
             argument,
             dma_state.subchannel,
@@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const {
 
 void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
     if (dma_state.method < non_puller_methods) {
-        gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
-                            dma_state.method_count);
+        puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+                               dma_state.method_count);
     } else {
         subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
                                                            num_methods, dma_state.method_count);
     }
 }
 
+void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+    puller.BindRasterizer(rasterizer);
+}
+
 } // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 872fd146a..fd7c936c4 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "video_core/engines/engine_interface.h"
+#include "video_core/engines/puller.h"
 
 namespace Core {
 class System;
@@ -17,7 +18,12 @@ class System;
 
 namespace Tegra {
 
+namespace Control {
+struct ChannelState;
+}
+
 class GPU;
+class MemoryManager;
 
 enum class SubmissionMode : u32 {
     IncreasingOld = 0,
@@ -102,7 +108,8 @@ struct CommandList final {
  */
 class DmaPusher final {
 public:
-    explicit DmaPusher(Core::System& system_, GPU& gpu_);
+    explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+                       Control::ChannelState& channel_state_);
     ~DmaPusher();
 
     void Push(CommandList&& entries) {
@@ -115,6 +122,8 @@ public:
         subchannels[subchannel_id] = engine;
     }
 
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
 private:
     static constexpr u32 non_puller_methods = 0x40;
     static constexpr u32 max_subchannels = 8;
@@ -148,6 +157,8 @@ private:
 
     GPU& gpu;
     Core::System& system;
+    MemoryManager& memory_manager;
+    mutable Engines::Puller puller;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
new file mode 100644
index 000000000..37f2ced18
--- /dev/null
+++ b/src/video_core/engines/puller.cpp
@@ -0,0 +1,297 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/core.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra::Engines {
+
+Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
+               Control::ChannelState& channel_state_)
+    : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
+                                                                               channel_state_} {}
+
+Puller::~Puller() = default;
+
+void Puller::ProcessBindMethod(const MethodCall& method_call) {
+    // Bind the current subchannel to the desired engine id.
+    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+              method_call.argument);
+    const auto engine_id = static_cast<EngineID>(method_call.argument);
+    bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+    switch (engine_id) {
+    case EngineID::FERMI_TWOD_A:
+        dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_B:
+        dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+    }
+}
+
+void Puller::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case Puller::FenceOperation::Acquire:
+        // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+        // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case Puller::FenceOperation::Increment:
+        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
+    }
+}
+
+void Puller::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
+void Puller::ProcessSemaphoreTriggerMethod() {
+    const auto semaphoreOperationMask = 0xF;
+    const auto op =
+        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+    if (op == GpuSemaphoreOperation::WriteLong) {
+        struct Block {
+            u32 sequence;
+            u32 zeros = 0;
+            u64 timestamp;
+        };
+
+        Block block{};
+        block.sequence = regs.semaphore_sequence;
+        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
+        // CoreTiming
+        block.timestamp = gpu.GetTicks();
+        memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
+    } else {
+        const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
+            (op == GpuSemaphoreOperation::AcquireGequal &&
+             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
+            (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
+            // Nothing to do in this case
+        } else {
+            regs.acquire_source = true;
+            regs.acquire_value = regs.semaphore_sequence;
+            if (op == GpuSemaphoreOperation::AcquireEqual) {
+                regs.acquire_active = true;
+                regs.acquire_mode = false;
+            } else if (op == GpuSemaphoreOperation::AcquireGequal) {
+                regs.acquire_active = true;
+                regs.acquire_mode = true;
+            } else if (op == GpuSemaphoreOperation::AcquireMask) {
+                // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
+                // semaphore_sequence, gives a non-0 result
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
+            } else {
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation");
+            }
+        }
+    }
+}
+
+void Puller::ProcessSemaphoreRelease() {
+    memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
+}
+
+void Puller::ProcessSemaphoreAcquire() {
+    const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+    const auto value = regs.semaphore_acquire;
+    if (word != value) {
+        regs.acquire_active = true;
+        regs.acquire_value = value;
+        // TODO(kemathe73) figure out how to do the acquire_timeout
+        regs.acquire_mode = false;
+        regs.acquire_source = false;
+    }
+}
+
+/// Calls a GPU puller method.
+void Puller::CallPullerMethod(const MethodCall& method_call) {
+    regs.reg_array[method_call.method] = method_call.argument;
+    const auto method = static_cast<BufferMethods>(method_call.method);
+
+    switch (method) {
+    case BufferMethods::BindObject: {
+        ProcessBindMethod(method_call);
+        break;
+    }
+    case BufferMethods::Nop:
+    case BufferMethods::SemaphoreAddressHigh:
+    case BufferMethods::SemaphoreAddressLow:
+    case BufferMethods::SemaphoreSequence:
+    case BufferMethods::UnkCacheFlush:
+    case BufferMethods::WrcacheFlush:
+    case BufferMethods::FenceValue:
+        break;
+    case BufferMethods::RefCnt:
+        rasterizer->SignalReference();
+        break;
+    case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
+        break;
+    case BufferMethods::SemaphoreTrigger: {
+        ProcessSemaphoreTriggerMethod();
+        break;
+    }
+    case BufferMethods::NotifyIntr: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
+        break;
+    }
+    case BufferMethods::Unk28: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
+        break;
+    }
+    case BufferMethods::SemaphoreAcquire: {
+        ProcessSemaphoreAcquire();
+        break;
+    }
+    case BufferMethods::SemaphoreRelease: {
+        ProcessSemaphoreRelease();
+        break;
+    }
+    case BufferMethods::Yield: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
+        break;
+    }
+    default:
+        LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
+        break;
+    }
+}
+
+/// Calls a GPU engine method.
+void Puller::CallEngineMethod(const MethodCall& method_call) {
+    const EngineID engine = bound_engines[method_call.subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
+                                           method_call.IsLastCall());
+        break;
+    case EngineID::MAXWELL_B:
+        channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
+                                             method_call.IsLastCall());
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
+                                                 method_call.IsLastCall());
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
+                                              method_call.IsLastCall());
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
+                                                method_call.IsLastCall());
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
+/// Calls a GPU engine multivalue method.
+void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                                   u32 methods_pending) {
+    const EngineID engine = bound_engines[subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_B:
+        channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
+/// Calls a GPU method.
+void Puller::CallMethod(const MethodCall& method_call) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
+              method_call.subchannel);
+
+    ASSERT(method_call.subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method_call.method)) {
+        CallEngineMethod(method_call);
+    } else {
+        CallPullerMethod(method_call);
+    }
+}
+
+/// Calls a GPU multivalue method.
+void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                             u32 methods_pending) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+    ASSERT(subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method)) {
+        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+    } else {
+        for (std::size_t i = 0; i < amount; i++) {
+            CallPullerMethod(MethodCall{
+                method,
+                base_start[i],
+                subchannel,
+                methods_pending - static_cast<u32>(i),
+            });
+        }
+    }
+}
+
+void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
+/// Determines where the method should be executed.
+[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
+    const auto buffer_method = static_cast<BufferMethods>(method);
+    return buffer_method >= BufferMethods::NonPullerMethods;
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
new file mode 100644
index 000000000..d948ec790
--- /dev/null
+++ b/src/video_core/engines/puller.h
@@ -0,0 +1,179 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+class DmaPusher;
+
+enum class EngineID {
+    FERMI_TWOD_A = 0x902D, // 2D Engine
+    MAXWELL_B = 0xB197,    // 3D Engine
+    KEPLER_COMPUTE_B = 0xB1C0,
+    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+    MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+class Puller final {
+public:
+    struct MethodCall {
+        u32 method{};
+        u32 argument{};
+        u32 subchannel{};
+        u32 method_count{};
+
+        explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
+            : method(method_), argument(argument_), subchannel(subchannel_),
+              method_count(method_count_) {}
+
+        [[nodiscard]] bool IsLastCall() const {
+            return method_count <= 1;
+        }
+    };
+
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+    };
+
+    explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
+                    Control::ChannelState& channel_state);
+    ~Puller();
+
+    void CallMethod(const MethodCall& method_call);
+
+    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                         u32 methods_pending);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    void CallPullerMethod(const MethodCall& method_call);
+
+    void CallEngineMethod(const MethodCall& method_call);
+
+    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                               u32 methods_pending);
+
+private:
+    Tegra::GPU& gpu;
+
+    MemoryManager& memory_manager;
+    DmaPusher& dma_pusher;
+    Control::ChannelState& channel_state;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    static constexpr std::size_t NUM_REGS = 0x800;
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x40;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS_NOINIT(0x4);
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+
+                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } semaphore_address;
+
+                u32 semaphore_sequence;
+                u32 semaphore_trigger;
+                INSERT_PADDING_WORDS_NOINIT(0xC);
+
+                // The pusher and the puller share the reference counter, the pusher only has read
+                // access
+                u32 reference_count;
+                INSERT_PADDING_WORDS_NOINIT(0x5);
+
+                u32 semaphore_acquire;
+                u32 semaphore_release;
+                u32 fence_value;
+                FenceAction fence_action;
+                INSERT_PADDING_WORDS_NOINIT(0xE2);
+
+                // Puller state
+                u32 acquire_mode;
+                u32 acquire_source;
+                u32 acquire_active;
+                u32 acquire_timeout;
+                u32 acquire_value;
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessSemaphoreAcquire();
+    void ProcessSemaphoreRelease();
+    void ProcessSemaphoreTriggerMethod();
+    void ProcessWaitForInterruptMethod();
+    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
+
+    /// Mapping of command subchannels to their bound engine ids
+    std::array<EngineID, 8> bound_engines{};
+
+    enum class GpuSemaphoreOperation {
+        AcquireEqual = 0x1,
+        WriteLong = 0x2,
+        AcquireGequal = 0x4,
+        AcquireMask = 0x8,
+    };
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_REG_POSITION(semaphore_address, 0x4);
+    ASSERT_REG_POSITION(semaphore_sequence, 0x6);
+    ASSERT_REG_POSITION(semaphore_trigger, 0x7);
+    ASSERT_REG_POSITION(reference_count, 0x14);
+    ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
+    ASSERT_REG_POSITION(semaphore_release, 0x1B);
+    ASSERT_REG_POSITION(fence_value, 0x1C);
+    ASSERT_REG_POSITION(fence_action, 0x1D);
+
+    ASSERT_REG_POSITION(acquire_mode, 0x100);
+    ASSERT_REG_POSITION(acquire_source, 0x101);
+    ASSERT_REG_POSITION(acquire_active, 0x102);
+    ASSERT_REG_POSITION(acquire_timeout, 0x103);
+    ASSERT_REG_POSITION(acquire_value, 0x104);
+
+#undef ASSERT_REG_POSITION
+};
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 1e9832ddd..d658e038d 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -4,12 +4,13 @@
 #pragma once
 
 #include <algorithm>
+#include <cstring>
+#include <memory>
 #include <queue>
 
 #include "common/common_types.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 
 namespace VideoCommon {
@@ -19,10 +20,10 @@ public:
     explicit FenceBase(u32 payload_, bool is_stubbed_)
         : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
 
-    explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+    explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
         : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
 
-    GPUVAddr GetAddress() const {
+    u8* GetAddress() const {
         return address;
     }
 
@@ -35,7 +36,7 @@ public:
     }
 
 private:
-    GPUVAddr address;
+    u8* address;
     u32 payload;
     bool is_semaphore;
 
@@ -57,7 +58,7 @@ public:
         buffer_cache.AccumulateFlushes();
     }
 
-    void SignalSemaphore(GPUVAddr addr, u32 value) {
+    void SignalSemaphore(u8* addr, u32 value) {
         TryReleasePendingFences();
         const bool should_flush = ShouldFlush();
         CommitAsyncFlushes();
@@ -91,8 +92,9 @@ public:
             }
             PopAsyncFlushes();
             if (current_fence->IsSemaphore()) {
-                gpu_memory.template Write<u32>(current_fence->GetAddress(),
-                                               current_fence->GetPayload());
+                char* address = reinterpret_cast<char*>(current_fence->GetAddress());
+                auto payload = current_fence->GetPayload();
+                std::memcpy(address, &payload, sizeof(payload));
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
             }
@@ -104,8 +106,8 @@ protected:
     explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                           TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
                           TQueryCache& query_cache_)
-        : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
-          texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
+        : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
+          buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
 
     virtual ~FenceManager() = default;
 
@@ -113,7 +115,7 @@ protected:
     /// true
     virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
     /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
-    virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
+    virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
     /// Queues a fence into the backend if the fence isn't stubbed.
     virtual void QueueFence(TFence& fence) = 0;
     /// Notifies that the backend fence has been signaled/reached in host GPU.
@@ -123,7 +125,6 @@ protected:
 
     VideoCore::RasterizerInterface& rasterizer;
     Tegra::GPU& gpu;
-    Tegra::MemoryManager& gpu_memory;
     TTextureCache& texture_cache;
     TTBufferCache& buffer_cache;
     TQueryCache& query_cache;
@@ -137,8 +138,9 @@ private:
             }
             PopAsyncFlushes();
             if (current_fence->IsSemaphore()) {
-                gpu_memory.template Write<u32>(current_fence->GetAddress(),
-                                               current_fence->GetPayload());
+                char* address = reinterpret_cast<char*>(current_fence->GetAddress());
+                const auto payload = current_fence->GetPayload();
+                std::memcpy(address, &payload, sizeof(payload));
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
             }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 33431f2a0..80a1c69e0 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -18,6 +18,8 @@
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "core/perf_stats.h"
 #include "video_core/cdma_pusher.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 
 struct GPU::Impl {
     explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
-        : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
-                                          system)},
-          dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
-          maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
-          fermi_2d{std::make_unique<Engines::Fermi2D>()},
-          kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
-          maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
-          kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+        : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
           shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
-          gpu_thread{system_, is_async_} {}
+          gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
 
     ~Impl() = default;
 
-    /// Binds a renderer to the GPU.
-    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
-        renderer = std::move(renderer_);
-        rasterizer = renderer->ReadRasterizer();
-
-        memory_manager->BindRasterizer(rasterizer);
-        maxwell_3d->BindRasterizer(rasterizer);
-        fermi_2d->BindRasterizer(rasterizer);
-        kepler_compute->BindRasterizer(rasterizer);
-        kepler_memory->BindRasterizer(rasterizer);
-        maxwell_dma->BindRasterizer(rasterizer);
+    std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
+        auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
+        channels.emplace(channel_id, channel_state);
+        scheduler->DeclareChannel(channel_state);
+        return channel_state;
     }
 
-    /// Calls a GPU method.
-    void CallMethod(const GPU::MethodCall& method_call) {
-        LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
-                  method_call.subchannel);
+    void BindChannel(s32 channel_id) {
+        if (bound_channel == channel_id) {
+            return;
+        }
+        auto it = channels.find(channel_id);
+        ASSERT(it != channels.end());
+        bound_channel = channel_id;
+        current_channel = it->second.get();
 
-        ASSERT(method_call.subchannel < bound_engines.size());
+        rasterizer->BindChannel(*current_channel);
+    }
 
-        if (ExecuteMethodOnEngine(method_call.method)) {
-            CallEngineMethod(method_call);
-        } else {
-            CallPullerMethod(method_call);
-        }
+    std::shared_ptr<Control::ChannelState> AllocateChannel() {
+        return CreateChannel(new_channel_id++);
     }
 
-    /// Calls a GPU multivalue method.
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending) {
-        LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+    void InitChannel(Control::ChannelState& to_init) {
+        to_init.Init(system, gpu);
+        to_init.BindRasterizer(rasterizer);
+        rasterizer->InitializeChannel(to_init);
+    }
 
-        ASSERT(subchannel < bound_engines.size());
+    void ReleaseChannel(Control::ChannelState& to_release) {
+        UNIMPLEMENTED();
+    }
 
-        if (ExecuteMethodOnEngine(method)) {
-            CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
-        } else {
-            for (std::size_t i = 0; i < amount; i++) {
-                CallPullerMethod(GPU::MethodCall{
-                    method,
-                    base_start[i],
-                    subchannel,
-                    methods_pending - static_cast<u32>(i),
-                });
-            }
+    void CreateHost1xChannel() {
+        if (host1x_channel) {
+            return;
         }
+        host1x_channel = CreateChannel(0);
+        host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system);
+        InitChannel(*host1x_channel);
+    }
+
+    /// Binds a renderer to the GPU.
+    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+        renderer = std::move(renderer_);
+        rasterizer = renderer->ReadRasterizer();
     }
 
     /// Flush all current written commands into the host GPU for execution.
@@ -146,42 +141,44 @@ struct GPU::Impl {
 
     /// Returns a reference to the Maxwell3D GPU engine.
     [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
-        return *maxwell_3d;
+        ASSERT(current_channel);
+        return *current_channel->maxwell_3d;
     }
 
     /// Returns a const reference to the Maxwell3D GPU engine.
     [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
-        return *maxwell_3d;
+        ASSERT(current_channel);
+        return *current_channel->maxwell_3d;
     }
 
     /// Returns a reference to the KeplerCompute GPU engine.
     [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
-        return *kepler_compute;
+        ASSERT(current_channel);
+        return *current_channel->kepler_compute;
     }
 
     /// Returns a reference to the KeplerCompute GPU engine.
     [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
-        return *kepler_compute;
+        ASSERT(current_channel);
+        return *current_channel->kepler_compute;
     }
 
     /// Returns a reference to the GPU memory manager.
     [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
-        return *memory_manager;
-    }
-
-    /// Returns a const reference to the GPU memory manager.
-    [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
-        return *memory_manager;
+        CreateHost1xChannel();
+        return *host1x_channel->memory_manager;
     }
 
     /// Returns a reference to the GPU DMA pusher.
     [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
-        return *dma_pusher;
+        ASSERT(current_channel);
+        return *current_channel->dma_pusher;
     }
 
     /// Returns a const reference to the GPU DMA pusher.
     [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
-        return *dma_pusher;
+        ASSERT(current_channel);
+        return *current_channel->dma_pusher;
     }
 
     /// Returns a reference to the underlying renderer.
@@ -306,7 +303,7 @@ struct GPU::Impl {
     /// This can be used to launch any necessary threads and register any necessary
     /// core timing events.
     void Start() {
-        gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+        gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
         cpu_context = renderer->GetRenderWindow().CreateSharedContext();
         cpu_context->MakeCurrent();
     }
@@ -328,8 +325,8 @@ struct GPU::Impl {
     }
 
     /// Push GPU command entries to be processed
-    void PushGPUEntries(Tegra::CommandList&& entries) {
-        gpu_thread.SubmitList(std::move(entries));
+    void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+        gpu_thread.SubmitList(channel, std::move(entries));
     }
 
     /// Push GPU command buffer entries to be processed
@@ -381,303 +378,16 @@ struct GPU::Impl {
         interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
     }
 
-    void ProcessBindMethod(const GPU::MethodCall& method_call) {
-        // Bind the current subchannel to the desired engine id.
-        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
-                  method_call.argument);
-        const auto engine_id = static_cast<EngineID>(method_call.argument);
-        bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
-        switch (engine_id) {
-        case EngineID::FERMI_TWOD_A:
-            dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
-            break;
-        case EngineID::MAXWELL_B:
-            dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
-            break;
-        case EngineID::KEPLER_COMPUTE_B:
-            dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
-            break;
-        case EngineID::MAXWELL_DMA_COPY_A:
-            dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
-            break;
-        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-            dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
-        }
-    }
-
-    void ProcessFenceActionMethod() {
-        switch (regs.fence_action.op) {
-        case GPU::FenceOperation::Acquire:
-            WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
-            break;
-        case GPU::FenceOperation::Increment:
-            IncrementSyncPoint(regs.fence_action.syncpoint_id);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
-        }
-    }
-
-    void ProcessWaitForInterruptMethod() {
-        // TODO(bunnei) ImplementMe
-        LOG_WARNING(HW_GPU, "(STUBBED) called");
-    }
-
-    void ProcessSemaphoreTriggerMethod() {
-        const auto semaphoreOperationMask = 0xF;
-        const auto op =
-            static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
-        if (op == GpuSemaphoreOperation::WriteLong) {
-            struct Block {
-                u32 sequence;
-                u32 zeros = 0;
-                u64 timestamp;
-            };
-
-            Block block{};
-            block.sequence = regs.semaphore_sequence;
-            // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
-            // CoreTiming
-            block.timestamp = GetTicks();
-            memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
-                                       sizeof(block));
-        } else {
-            const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
-            if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
-                (op == GpuSemaphoreOperation::AcquireGequal &&
-                 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
-                (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
-                // Nothing to do in this case
-            } else {
-                regs.acquire_source = true;
-                regs.acquire_value = regs.semaphore_sequence;
-                if (op == GpuSemaphoreOperation::AcquireEqual) {
-                    regs.acquire_active = true;
-                    regs.acquire_mode = false;
-                } else if (op == GpuSemaphoreOperation::AcquireGequal) {
-                    regs.acquire_active = true;
-                    regs.acquire_mode = true;
-                } else if (op == GpuSemaphoreOperation::AcquireMask) {
-                    // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
-                    // semaphore_sequence, gives a non-0 result
-                    LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
-                } else {
-                    LOG_ERROR(HW_GPU, "Invalid semaphore operation");
-                }
-            }
-        }
-    }
-
-    void ProcessSemaphoreRelease() {
-        memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
-                                   regs.semaphore_release);
-    }
-
-    void ProcessSemaphoreAcquire() {
-        const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
-        const auto value = regs.semaphore_acquire;
-        if (word != value) {
-            regs.acquire_active = true;
-            regs.acquire_value = value;
-            // TODO(kemathe73) figure out how to do the acquire_timeout
-            regs.acquire_mode = false;
-            regs.acquire_source = false;
-        }
-    }
-
-    /// Calls a GPU puller method.
-    void CallPullerMethod(const GPU::MethodCall& method_call) {
-        regs.reg_array[method_call.method] = method_call.argument;
-        const auto method = static_cast<BufferMethods>(method_call.method);
-
-        switch (method) {
-        case BufferMethods::BindObject: {
-            ProcessBindMethod(method_call);
-            break;
-        }
-        case BufferMethods::Nop:
-        case BufferMethods::SemaphoreAddressHigh:
-        case BufferMethods::SemaphoreAddressLow:
-        case BufferMethods::SemaphoreSequence:
-            break;
-        case BufferMethods::UnkCacheFlush:
-            rasterizer->SyncGuestHost();
-            break;
-        case BufferMethods::WrcacheFlush:
-            rasterizer->SignalReference();
-            break;
-        case BufferMethods::FenceValue:
-            break;
-        case BufferMethods::RefCnt:
-            rasterizer->SignalReference();
-            break;
-        case BufferMethods::FenceAction:
-            ProcessFenceActionMethod();
-            break;
-        case BufferMethods::WaitForInterrupt:
-            rasterizer->WaitForIdle();
-            break;
-        case BufferMethods::SemaphoreTrigger: {
-            ProcessSemaphoreTriggerMethod();
-            break;
-        }
-        case BufferMethods::NotifyIntr: {
-            // TODO(Kmather73): Research and implement this method.
-            LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
-            break;
-        }
-        case BufferMethods::Unk28: {
-            // TODO(Kmather73): Research and implement this method.
-            LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
-            break;
-        }
-        case BufferMethods::SemaphoreAcquire: {
-            ProcessSemaphoreAcquire();
-            break;
-        }
-        case BufferMethods::SemaphoreRelease: {
-            ProcessSemaphoreRelease();
-            break;
-        }
-        case BufferMethods::Yield: {
-            // TODO(Kmather73): Research and implement this method.
-            LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
-            break;
-        }
-        default:
-            LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
-            break;
-        }
-    }
-
-    /// Calls a GPU engine method.
-    void CallEngineMethod(const GPU::MethodCall& method_call) {
-        const EngineID engine = bound_engines[method_call.subchannel];
-
-        switch (engine) {
-        case EngineID::FERMI_TWOD_A:
-            fermi_2d->CallMethod(method_call.method, method_call.argument,
-                                 method_call.IsLastCall());
-            break;
-        case EngineID::MAXWELL_B:
-            maxwell_3d->CallMethod(method_call.method, method_call.argument,
-                                   method_call.IsLastCall());
-            break;
-        case EngineID::KEPLER_COMPUTE_B:
-            kepler_compute->CallMethod(method_call.method, method_call.argument,
-                                       method_call.IsLastCall());
-            break;
-        case EngineID::MAXWELL_DMA_COPY_A:
-            maxwell_dma->CallMethod(method_call.method, method_call.argument,
-                                    method_call.IsLastCall());
-            break;
-        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-            kepler_memory->CallMethod(method_call.method, method_call.argument,
-                                      method_call.IsLastCall());
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented engine");
-        }
-    }
-
-    /// Calls a GPU engine multivalue method.
-    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                               u32 methods_pending) {
-        const EngineID engine = bound_engines[subchannel];
-
-        switch (engine) {
-        case EngineID::FERMI_TWOD_A:
-            fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::MAXWELL_B:
-            maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::KEPLER_COMPUTE_B:
-            kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::MAXWELL_DMA_COPY_A:
-            maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-            kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented engine");
-        }
-    }
-
-    /// Determines where the method should be executed.
-    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
-        const auto buffer_method = static_cast<BufferMethods>(method);
-        return buffer_method >= BufferMethods::NonPullerMethods;
-    }
-
-    struct Regs {
-        static constexpr size_t NUM_REGS = 0x40;
-
-        union {
-            struct {
-                INSERT_PADDING_WORDS_NOINIT(0x4);
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-
-                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } semaphore_address;
-
-                u32 semaphore_sequence;
-                u32 semaphore_trigger;
-                INSERT_PADDING_WORDS_NOINIT(0xC);
-
-                // The pusher and the puller share the reference counter, the pusher only has read
-                // access
-                u32 reference_count;
-                INSERT_PADDING_WORDS_NOINIT(0x5);
-
-                u32 semaphore_acquire;
-                u32 semaphore_release;
-                u32 fence_value;
-                GPU::FenceAction fence_action;
-                INSERT_PADDING_WORDS_NOINIT(0xE2);
-
-                // Puller state
-                u32 acquire_mode;
-                u32 acquire_source;
-                u32 acquire_active;
-                u32 acquire_timeout;
-                u32 acquire_value;
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
-
     GPU& gpu;
     Core::System& system;
-    std::unique_ptr<Tegra::MemoryManager> memory_manager;
-    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+
     std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
     std::unique_ptr<VideoCore::RendererBase> renderer;
     VideoCore::RasterizerInterface* rasterizer = nullptr;
     const bool use_nvdec;
 
-    /// Mapping of command subchannels to their bound engine ids
-    std::array<EngineID, 8> bound_engines{};
-    /// 3D engine
-    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
-    /// 2D engine
-    std::unique_ptr<Engines::Fermi2D> fermi_2d;
-    /// Compute engine
-    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
-    /// DMA engine
-    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
-    /// Inline memory engine
-    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+    std::shared_ptr<Control::ChannelState> host1x_channel;
+    s32 new_channel_id{1};
     /// Shader build notifier
     std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
     /// When true, we are about to shut down emulation session, so terminate outstanding tasks
@@ -710,33 +420,10 @@ struct GPU::Impl {
     VideoCommon::GPUThread::ThreadManager gpu_thread;
     std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
 
-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
-                  "Field " #field_name " has invalid position")
-
-    ASSERT_REG_POSITION(semaphore_address, 0x4);
-    ASSERT_REG_POSITION(semaphore_sequence, 0x6);
-    ASSERT_REG_POSITION(semaphore_trigger, 0x7);
-    ASSERT_REG_POSITION(reference_count, 0x14);
-    ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
-    ASSERT_REG_POSITION(semaphore_release, 0x1B);
-    ASSERT_REG_POSITION(fence_value, 0x1C);
-    ASSERT_REG_POSITION(fence_action, 0x1D);
-
-    ASSERT_REG_POSITION(acquire_mode, 0x100);
-    ASSERT_REG_POSITION(acquire_source, 0x101);
-    ASSERT_REG_POSITION(acquire_active, 0x102);
-    ASSERT_REG_POSITION(acquire_timeout, 0x103);
-    ASSERT_REG_POSITION(acquire_value, 0x104);
-
-#undef ASSERT_REG_POSITION
-
-    enum class GpuSemaphoreOperation {
-        AcquireEqual = 0x1,
-        WriteLong = 0x2,
-        AcquireGequal = 0x4,
-        AcquireMask = 0x8,
-    };
+    std::unique_ptr<Tegra::Control::Scheduler> scheduler;
+    std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
+    Tegra::Control::ChannelState* current_channel;
+    s32 bound_channel{-1};
 };
 
 GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -744,17 +431,24 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
 
 GPU::~GPU() = default;
 
-void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
-    impl->BindRenderer(std::move(renderer));
+std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
+    return impl->AllocateChannel();
+}
+
+void GPU::InitChannel(Control::ChannelState& to_init) {
+    impl->InitChannel(to_init);
 }
 
-void GPU::CallMethod(const MethodCall& method_call) {
-    impl->CallMethod(method_call);
+void GPU::BindChannel(s32 channel_id) {
+    impl->BindChannel(channel_id);
 }
 
-void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                          u32 methods_pending) {
-    impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
+void GPU::ReleaseChannel(Control::ChannelState& to_release) {
+    impl->ReleaseChannel(to_release);
+}
+
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
+    impl->BindRenderer(std::move(renderer));
 }
 
 void GPU::FlushCommands() {
@@ -881,8 +575,8 @@ void GPU::ReleaseContext() {
     impl->ReleaseContext();
 }
 
-void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
-    impl->PushGPUEntries(std::move(entries));
+void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+    impl->PushGPUEntries(channel, std::move(entries));
 }
 
 void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 42c91954f..74d55e074 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -89,57 +89,20 @@ class Maxwell3D;
 class KeplerCompute;
 } // namespace Engines
 
-enum class EngineID {
-    FERMI_TWOD_A = 0x902D, // 2D Engine
-    MAXWELL_B = 0xB197,    // 3D Engine
-    KEPLER_COMPUTE_B = 0xB1C0,
-    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
-    MAXWELL_DMA_COPY_A = 0xB0B5,
-};
+namespace Control {
+struct ChannelState;
+}
 
 class MemoryManager;
 
 class GPU final {
 public:
-    struct MethodCall {
-        u32 method{};
-        u32 argument{};
-        u32 subchannel{};
-        u32 method_count{};
-
-        explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
-            : method(method_), argument(argument_), subchannel(subchannel_),
-              method_count(method_count_) {}
-
-        [[nodiscard]] bool IsLastCall() const {
-            return method_count <= 1;
-        }
-    };
-
-    enum class FenceOperation : u32 {
-        Acquire = 0,
-        Increment = 1,
-    };
-
-    union FenceAction {
-        u32 raw;
-        BitField<0, 1, FenceOperation> op;
-        BitField<8, 24, u32> syncpoint_id;
-    };
-
     explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
     ~GPU();
 
     /// Binds a renderer to the GPU.
     void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
 
-    /// Calls a GPU method.
-    void CallMethod(const MethodCall& method_call);
-
-    /// Calls a GPU multivalue method.
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending);
-
     /// Flush all current written commands into the host GPU for execution.
     void FlushCommands();
     /// Synchronizes CPU writes with Host GPU memory.
@@ -147,6 +110,14 @@ public:
     /// Signal the ending of command list.
     void OnCommandListEnd();
 
+    std::shared_ptr<Control::ChannelState> AllocateChannel();
+
+    void InitChannel(Control::ChannelState& to_init);
+
+    void BindChannel(s32 channel_id);
+
+    void ReleaseChannel(Control::ChannelState& to_release);
+
     /// Request a host GPU memory flush from the CPU.
     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
 
@@ -226,7 +197,7 @@ public:
     void ReleaseContext();
 
     /// Push GPU command entries to be processed
-    void PushGPUEntries(Tegra::CommandList&& entries);
+    void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
 
     /// Push GPU command buffer entries to be processed
     void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
@@ -248,7 +219,7 @@ public:
 
 private:
     struct Impl;
-    std::unique_ptr<Impl> impl;
+    mutable std::unique_ptr<Impl> impl;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index f0e48cfbd..9844cde43 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -8,6 +8,7 @@
 #include "common/thread.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
+#include "video_core/control/scheduler.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread {
 /// Runs the GPU thread
 static void RunThread(std::stop_token stop_token, Core::System& system,
                       VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
+                      Tegra::Control::Scheduler& scheduler, SynchState& state) {
     std::string name = "GPU";
     MicroProfileOnThreadCreate(name.c_str());
     SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
             break;
         }
         if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
-            dma_pusher.Push(std::move(submit_list->entries));
-            dma_pusher.DispatchCalls();
+            scheduler.Push(submit_list->channel, std::move(submit_list->entries));
         } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
         } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default;
 
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                 Core::Frontend::GraphicsContext& context,
-                                Tegra::DmaPusher& dma_pusher) {
+                                Tegra::Control::Scheduler& scheduler) {
     rasterizer = renderer.ReadRasterizer();
     thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
-                          std::ref(dma_pusher), std::ref(state));
+                          std::ref(scheduler), std::ref(state));
 }
 
-void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
-    PushCommand(SubmitListCommand(std::move(entries)));
+void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
+    PushCommand(SubmitListCommand(channel, std::move(entries)));
 }
 
 void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2f8210cb9..c5078a2b3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -15,7 +15,9 @@
 
 namespace Tegra {
 struct FramebufferConfig;
-class DmaPusher;
+namespace Control {
+class Scheduler;
+}
 } // namespace Tegra
 
 namespace Core {
@@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread {
 
 /// Command to signal to the GPU thread that a command list is ready for processing
 struct SubmitListCommand final {
-    explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
+    explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
+        : channel{channel_}, entries{std::move(entries_)} {}
 
+    s32 channel;
     Tegra::CommandList entries;
 };
 
@@ -112,10 +116,10 @@ public:
 
     /// Creates and starts the GPU thread.
     void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                     Tegra::DmaPusher& dma_pusher);
+                     Tegra::Control::Scheduler& scheduler);
 
     /// Push GPU command entries to be processed
-    void SubmitList(Tegra::CommandList&& entries);
+    void SubmitList(s32 channel, Tegra::CommandList&& entries);
 
     /// Swap buffers (render frame)
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index bf9eb735d..a3efd365e 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
     // TryLockPage(page_entry, size);
     auto& current_page = page_table[PageEntryIndex(gpu_addr)];
 
-    if ((!current_page.IsValid() && page_entry.IsValid()) ||
-        current_page.ToAddress() != page_entry.ToAddress()) {
-        rasterizer->ModifyGPUMemory(gpu_addr, size);
-    }
-
     current_page = page_entry;
 }
 
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 889b606b3..eb68ea638 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -17,6 +17,7 @@
 
 #include "common/assert.h"
 #include "common/settings.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
@@ -90,13 +91,10 @@ private:
 };
 
 template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
-class QueryCacheBase {
+class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
 public:
-    explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
-                            Tegra::Engines::Maxwell3D& maxwell3d_,
-                            Tegra::MemoryManager& gpu_memory_)
-        : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-          gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+    explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
+        : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
                                                           VideoCore::QueryType::SamplesPassed}}} {}
 
     void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -117,13 +115,13 @@ public:
      */
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
         std::unique_lock lock{mutex};
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
         ASSERT(cpu_addr);
 
         CachedQuery* query = TryGet(*cpu_addr);
         if (!query) {
             ASSERT_OR_EXECUTE(cpu_addr, return;);
-            u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+            u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);
 
             query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
         }
@@ -137,7 +135,7 @@ public:
     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
     void UpdateCounters() {
         std::unique_lock lock{mutex};
-        const auto& regs = maxwell3d.regs;
+        const auto& regs = maxwell3d->regs;
         Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
     }
 
@@ -264,8 +262,6 @@ private:
     static constexpr unsigned YUZU_PAGEBITS = 12;
 
     VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::MemoryManager& gpu_memory;
 
     std::recursive_mutex mutex;
 
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index a04a76481..8dacb2626 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -16,6 +16,9 @@ class MemoryManager;
 namespace Engines {
 class AccelerateDMAInterface;
 }
+namespace Control {
+struct ChannelState;
+}
 } // namespace Tegra
 
 namespace VideoCore {
@@ -137,5 +140,11 @@ public:
     /// Initialize disk cached resources for the game being emulated
     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                                    const DiskResourceLoadCallback& callback) {}
+
+    virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
+
+    virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
+
+    virtual void ReleaseChannel(s32 channel_id) {}
 };
 } // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 6e82c2e28..c76446b60 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -12,7 +12,7 @@ namespace OpenGL {
 
 GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
 
-GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
     : FenceBase{address_, payload_, is_stubbed_} {}
 
 GLInnerFence::~GLInnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
     return std::make_shared<GLInnerFence>(value, is_stubbed);
 }
 
-Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
     return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
 }
 
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 14ff00db2..fced8d002 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,7 +17,7 @@ namespace OpenGL {
 class GLInnerFence : public VideoCommon::FenceBase {
 public:
     explicit GLInnerFence(u32 payload_, bool is_stubbed_);
-    explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
+    explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
     ~GLInnerFence();
 
     void Queue();
@@ -41,7 +41,7 @@ public:
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
-    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
     void QueueFence(Fence& fence) override;
     bool IsFenceSignaled(Fence& fence) const override;
     void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index ed40f5791..5070db441 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
 
 } // Anonymous namespace
 
-QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                       Tegra::MemoryManager& gpu_memory_)
-    : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
+    : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
 
 QueryCache::~QueryCache() = default;
 
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 8a49f1ef0..14ce59990 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
 class QueryCache final
     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
-    explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                        Tegra::MemoryManager& gpu_memory_);
+    explicit QueryCache(RasterizerOpenGL& rasterizer_);
     ~QueryCache();
 
     OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a0d048b0b..e8d61bd41 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
       kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
       screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
       texture_cache_runtime(device, program_manager, state_tracker),
-      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
-      buffer_cache_runtime(device),
-      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
-      shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
-                   buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
-      query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+      texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+      buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+      shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
+                   state_tracker, gpu.ShaderNotify()),
+      query_cache(*this), accelerate_dma(buffer_cache),
       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
 
 RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
         gpu_memory.Write<u32>(addr, value);
         return;
     }
-    fence_manager.SignalSemaphore(addr, value);
+    auto paddr = gpu_memory.GetPointer(addr);
+    fence_manager.SignalSemaphore(paddr, value);
 }
 
 void RasterizerOpenGL::SignalSyncPoint(u32 value) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 0b8d8ec92..494581d0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
 } // Anonymous namespace
 
 ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                         TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                         ProgramManager& program_manager_, StateTracker& state_tracker_,
-                         VideoCore::ShaderNotify& shader_notify_)
-    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
-      emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
-      buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
-      shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+                         const Device& device_, TextureCache& texture_cache_,
+                         BufferCache& buffer_cache_, ProgramManager& program_manager_,
+                         StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
+    : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
+      texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
+      state_tracker{state_tracker_}, shader_notify{shader_notify_},
+      use_asynchronous_shaders{device.UseAsynchronousShaders()},
       profile{
           .supported_spirv = 0x00010000,
 
@@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
         current_pipeline = nullptr;
         return nullptr;
     }
-    const auto& regs{maxwell3d.regs};
+    const auto& regs{maxwell3d->regs};
     graphics_key.raw = 0;
     graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
     graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
@@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
     }
     // If something is using depth, we can assume that games are not rendering anything which
     // will be used one time.
-    if (maxwell3d.regs.zeta_enable) {
+    if (maxwell3d->regs.zeta_enable) {
         return nullptr;
     }
     // If games are using a small index count, we can assume these are full screen quads.
     // Usually these shaders are only used once for building textures so we can assume they
     // can't be built async
-    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+    if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
         return pipeline;
     }
     return nullptr;
@@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
     if (!shader) {
         return nullptr;
     }
-    const auto& qmd{kepler_compute.launch_description};
+    const auto& qmd{kepler_compute->launch_description};
     const ComputePipelineKey key{
         .unique_hash = shader->unique_hash,
         .shared_memory_size = qmd.shared_alloc,
@@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
     }
     auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
     return std::make_unique<GraphicsPipeline>(
-        device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
-        thread_worker, &shader_notify, sources, sources_spirv, infos, key);
+        device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager,
+        state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key);
 
 } catch (Shader::Exception& exception) {
     LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
 
 std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
     const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
-    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
-    const auto& qmd{kepler_compute.launch_description};
-    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+    const auto& qmd{kepler_compute->launch_description};
+    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
     env.SetCachedSize(shader->size_bytes);
 
     main_pools.ReleaseContents();
@@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
         break;
     }
 
-    return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
-                                             kepler_compute, program_manager, program.info, code,
+    return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory,
+                                             *kepler_compute, program_manager, program.info, code,
                                              code_spirv);
 } catch (Shader::Exception& exception) {
     LOG_ERROR(Render_OpenGL, "{}", exception.what());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a14269dea..89f181fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
 class ShaderCache : public VideoCommon::ShaderCache {
 public:
     explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                         TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                         ProgramManager& program_manager_, StateTracker& state_tracker_,
-                         VideoCore::ShaderNotify& shader_notify_);
+                         const Device& device_, TextureCache& texture_cache_,
+                         BufferCache& buffer_cache_, ProgramManager& program_manager_,
+                         StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
     ~ShaderCache();
 
     void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7c78d0299..68c2bc34c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
                                Core::Frontend::EmuWindow& emu_window,
                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
-    : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
-      cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
+    : RendererBase(emu_window, std::move(context_)),
+      telemetry_session(telemetry_session_),
+      cpu_memory(cpu_memory_),
+      gpu(gpu_),
+      library(OpenLibrary()),
       instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
                               true, Settings::values.renderer_debug.GetValue())),
       debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
       surface(CreateSurface(instance, render_window)),
-      device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
-      state_tracker(gpu), scheduler(device, state_tracker),
+      device(CreateDevice(instance, dld, *surface)),
+      memory_allocator(device, false),
+      state_tracker(gpu),
+      scheduler(device, state_tracker),
       swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
                 render_window.GetFramebufferLayout().height, false),
       blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
                   screen_info),
-      rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
-                 memory_allocator, state_tracker, scheduler) {
+      rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
+                 state_tracker, scheduler) {
     Report();
 } catch (const vk::Exception& exception) {
     LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index c249b34d4..301cbbabe 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -14,7 +14,7 @@ namespace Vulkan {
 InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
     : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
 
-InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
+InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
     : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
 
 InnerFence::~InnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
     return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
 }
 
-Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
     return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 7c0bbd80a..ea9e88052 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -26,7 +26,7 @@ class Scheduler;
 class InnerFence : public VideoCommon::FenceBase {
 public:
     explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
-    explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
+    explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
     ~InnerFence();
 
     void Queue();
@@ -51,7 +51,7 @@ public:
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
-    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
     void QueueFence(Fence& fence) override;
     bool IsFenceSignaled(Fence& fence) const override;
     void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index accbfc8e1..b1e0b96c4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
     return std::memcmp(&rhs, this, Size()) == 0;
 }
 
-PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                             Tegra::Engines::KeplerCompute& kepler_compute_,
-                             Tegra::MemoryManager& gpu_memory_, const Device& device_,
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
                              Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
                              UpdateDescriptorQueue& update_descriptor_queue_,
                              RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
                              TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
-    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
-      device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
-      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
-      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+    : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
+      descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
+      render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
+      texture_cache{texture_cache_}, shader_notify{shader_notify_},
       use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
       workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
       serialization_thread(1, "VkPipelineSerialization") {
@@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
         current_pipeline = nullptr;
         return nullptr;
     }
-    graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+    graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
                                device.IsExtVertexInputDynamicStateSupported());
 
     if (current_pipeline) {
@@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
     if (!shader) {
         return nullptr;
     }
-    const auto& qmd{kepler_compute.launch_description};
+    const auto& qmd{kepler_compute->launch_description};
     const ComputePipelineCacheKey key{
         .unique_hash = shader->unique_hash,
         .shared_memory_size = qmd.shared_alloc,
@@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
     }
     // If something is using depth, we can assume that games are not rendering anything which
     // will be used one time.
-    if (maxwell3d.regs.zeta_enable) {
+    if (maxwell3d->regs.zeta_enable) {
         return nullptr;
     }
     // If games are using a small index count, we can assume these are full screen quads.
     // Usually these shaders are only used once for building textures so we can assume they
     // can't be built async
-    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+    if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
         return pipeline;
     }
     return nullptr;
@@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
     }
     Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
     return std::make_unique<GraphicsPipeline>(
-        maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+        *maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
         descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
         std::move(modules), infos);
 
@@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
 
 std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
     const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
-    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
-    const auto& qmd{kepler_compute.launch_description};
-    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+    const auto& qmd{kepler_compute->launch_description};
+    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
     env.SetCachedSize(shader->size_bytes);
 
     main_pools.ReleaseContents();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 127957dbf..61f9e9366 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -100,10 +100,8 @@ struct ShaderPools {
 
 class PipelineCache : public VideoCommon::ShaderCache {
 public:
-    explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
-                           Tegra::Engines::KeplerCompute& kepler_compute,
-                           Tegra::MemoryManager& gpu_memory, const Device& device,
-                           Scheduler& scheduler, DescriptorPool& descriptor_pool,
+    explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
+                           DescriptorPool& descriptor_pool,
                            UpdateDescriptorQueue& update_descriptor_queue,
                            RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
                            TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2b859c6b8..393bbdf37 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
     usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
 }
 
-QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
-                       Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
-                       const Device& device_, Scheduler& scheduler_)
-    : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
+QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+                           Scheduler& scheduler_)
+    : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
       query_pools{
           QueryPool{device_, scheduler_, QueryType::SamplesPassed},
       } {}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index b0d86c4f8..26762ee09 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,9 +52,8 @@ private:
 class QueryCache final
     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
-    explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
-                        Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
-                        const Device& device_, Scheduler& scheduler_);
+    explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+                        Scheduler& scheduler_);
     ~QueryCache();
 
     std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7e40c2df1..5d9ff0589 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -11,6 +11,7 @@
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/blit_image.h"
@@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
 } // Anonymous namespace
 
 RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
-                                   Tegra::MemoryManager& gpu_memory_,
                                    Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
                                    const Device& device_, MemoryAllocator& memory_allocator_,
                                    StateTracker& state_tracker_, Scheduler& scheduler_)
-    : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
-      gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
-      screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
-      state_tracker{state_tracker_}, scheduler{scheduler_},
+    : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
+      memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
       update_descriptor_queue(device, scheduler),
       blit_image(device, scheduler, state_tracker, descriptor_pool),
@@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
                                                        memory_allocator, staging_pool,
                                                        blit_image,       astc_decoder_pass,
                                                        render_pass_cache},
-      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+      texture_cache(texture_cache_runtime, *this),
       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
                            update_descriptor_queue, descriptor_pool),
-      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
-      pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
-                     descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
-                     texture_cache, gpu.ShaderNotify()),
-      query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
+      buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+      pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
+                     render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
+      query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
       wfi_event(device.GetLogical().CreateEvent()) {
     scheduler.SetQueryCache(query_cache);
@@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     UpdateDynamicStates();
 
-    const auto& regs{maxwell3d.regs};
-    const u32 num_instances{maxwell3d.mme_draw.instance_count};
+    const auto& regs{maxwell3d->regs};
+    const u32 num_instances{maxwell3d->mme_draw.instance_count};
     const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
     scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
         if (draw_params.is_indexed) {
@@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 void RasterizerVulkan::Clear() {
     MICROPROFILE_SCOPE(Vulkan_Clearing);
 
-    if (!maxwell3d.ShouldExecute()) {
+    if (!maxwell3d->ShouldExecute()) {
         return;
     }
     FlushWork();
 
     query_cache.UpdateCounters();
 
-    auto& regs = maxwell3d.regs;
+    auto& regs = maxwell3d->regs;
     const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
                            regs.clear_buffers.A;
     const bool use_depth = regs.clear_buffers.Z;
@@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() {
         return;
     }
     std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
-    pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
+    pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
 
-    const auto& qmd{kepler_compute.launch_description};
+    const auto& qmd{kepler_compute->launch_description};
     const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
@@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
 
 void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
     if (!gpu.IsAsync()) {
-        gpu_memory.Write<u32>(addr, value);
+        gpu_memory->Write<u32>(addr, value);
         return;
     }
-    fence_manager.SignalSemaphore(addr, value);
+    auto paddr = gpu_memory->GetPointer(addr);
+    fence_manager.SignalSemaphore(paddr, value);
 }
 
 void RasterizerVulkan::SignalSyncPoint(u32 value) {
@@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
 
 void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
                                                 std::span<u8> memory) {
-    auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+    auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
     if (!cpu_addr) [[unlikely]] {
-        gpu_memory.WriteBlock(address, memory.data(), copy_size);
+        gpu_memory->WriteBlock(address, memory.data(), copy_size);
         return;
     }
-    gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+    gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
     {
         std::unique_lock<std::mutex> lock{buffer_cache.mutex};
         if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
 }
 
 void RasterizerVulkan::UpdateDynamicStates() {
-    auto& regs = maxwell3d.regs;
+    auto& regs = maxwell3d->regs;
     UpdateViewportsState(regs);
     UpdateScissorsState(regs);
     UpdateDepthBias(regs);
@@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
 }
 
 void RasterizerVulkan::BeginTransformFeedback() {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
@@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
 }
 
 void RasterizerVulkan::EndTransformFeedback() {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
     if (regs.tfb_enabled == 0) {
         return;
     }
@@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
 }
 
 void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
-    auto& dirty{maxwell3d.dirty.flags};
+    auto& dirty{maxwell3d->dirty.flags};
     if (!dirty[Dirty::VertexInput]) {
         return;
     }
@@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
     });
 }
 
+void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
+    CreateChannel(channel);
+    {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+        texture_cache.CreateChannel(channel);
+        buffer_cache.CreateChannel(channel);
+    }
+    pipeline_cache.CreateChannel(channel);
+    query_cache.CreateChannel(channel);
+    state_tracker.SetupTables(channel);
+}
+
+void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
+    const s32 channel_id = channel.bind_id;
+    BindToChannel(channel_id);
+    {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+        texture_cache.BindToChannel(channel_id);
+        buffer_cache.BindToChannel(channel_id);
+    }
+    pipeline_cache.BindToChannel(channel_id);
+    query_cache.BindToChannel(channel_id);
+    state_tracker.ChangeChannel(channel);
+    scheduler.InvalidateState();
+}
+
+void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
+    EraseChannel(channel_id);
+    {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+        texture_cache.EraseChannel(channel_id);
+        buffer_cache.EraseChannel(channel_id);
+    }
+    pipeline_cache.EraseChannel(channel_id);
+    query_cache.EraseChannel(channel_id);
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0370ea39b..642fe6576 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -8,6 +8,7 @@
 #include <boost/container/static_vector.hpp>
 
 #include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/rasterizer_accelerated.h"
 #include "video_core/rasterizer_interface.h"
@@ -54,13 +55,13 @@ private:
     BufferCache& buffer_cache;
 };
 
-class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
+class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
+                               protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
 public:
     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
-                              Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                              ScreenInfo& screen_info_, const Device& device_,
-                              MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
-                              Scheduler& scheduler_);
+                              Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
+                              const Device& device_, MemoryAllocator& memory_allocator_,
+                              StateTracker& state_tracker_, Scheduler& scheduler_);
     ~RasterizerVulkan() override;
 
     void Draw(bool is_indexed, bool is_instanced) override;
@@ -99,6 +100,12 @@ public:
     void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                            const VideoCore::DiskResourceLoadCallback& callback) override;
 
+    void InitializeChannel(Tegra::Control::ChannelState& channel) override;
+
+    void BindChannel(Tegra::Control::ChannelState& channel) override;
+
+    void ReleaseChannel(s32 channel_id) override;
+
 private:
     static constexpr size_t MAX_TEXTURES = 192;
     static constexpr size_t MAX_IMAGES = 48;
@@ -134,9 +141,6 @@ private:
     void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
 
     Tegra::GPU& gpu;
-    Tegra::MemoryManager& gpu_memory;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
 
     ScreenInfo& screen_info;
     const Device& device;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 9ad096431..a87bf8dd3 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -7,6 +7,7 @@
 
 #include "common/common_types.h"
 #include "core/core.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
@@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
 }
 } // Anonymous namespace
 
-StateTracker::StateTracker(Tegra::GPU& gpu)
-    : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
-    auto& tables{gpu.Maxwell3D().dirty.tables};
+void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
+    auto& tables{channel_state.maxwell_3d->dirty.tables};
     SetupDirtyFlags(tables);
     SetupDirtyViewports(tables);
     SetupDirtyScissors(tables);
@@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
     SetupDirtyVertexBindings(tables);
 }
 
+void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
+    flags = &channel_state.maxwell_3d->dirty.flags;
+}
+
+StateTracker::StateTracker(Tegra::GPU& gpu)
+    : flags{}, invalidation_flags{MakeInvalidationFlags()} {}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index a85bc1c10..9f8a887f9 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -10,6 +10,12 @@
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"
 
+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
 namespace Vulkan {
 
 namespace Dirty {
@@ -56,16 +62,16 @@ public:
     explicit StateTracker(Tegra::GPU& gpu);
 
     void InvalidateCommandBufferState() {
-        flags |= invalidation_flags;
+        (*flags) |= invalidation_flags;
         current_topology = INVALID_TOPOLOGY;
     }
 
     void InvalidateViewports() {
-        flags[Dirty::Viewports] = true;
+        (*flags)[Dirty::Viewports] = true;
     }
 
     void InvalidateScissors() {
-        flags[Dirty::Scissors] = true;
+        (*flags)[Dirty::Scissors] = true;
     }
 
     bool TouchViewports() {
@@ -139,16 +145,20 @@ public:
         return has_changed;
     }
 
+    void SetupTables(Tegra::Control::ChannelState& channel_state);
+
+    void ChangeChannel(Tegra::Control::ChannelState& channel_state);
+
 private:
     static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
 
     bool Exchange(std::size_t id, bool new_value) const noexcept {
-        const bool is_dirty = flags[id];
-        flags[id] = new_value;
+        const bool is_dirty = (*flags)[id];
+        (*flags)[id] = new_value;
         return is_dirty;
     }
 
-    Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+    Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
     Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
     Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
 };
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 164e4ee0e..f53066579 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -8,6 +8,7 @@
 #include "common/assert.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
     RemovePendingShaders();
 }
 
-ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_)
-    : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
-      rasterizer{rasterizer_} {}
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
 
 bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
-    auto& dirty{maxwell3d.dirty.flags};
+    auto& dirty{maxwell3d->dirty.flags};
     if (!dirty[VideoCommon::Dirty::Shaders]) {
         return last_shaders_valid;
     }
     dirty[VideoCommon::Dirty::Shaders] = false;
 
-    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
     for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+        if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
             unique_hashes[index] = 0;
             continue;
         }
-        const auto& shader_config{maxwell3d.regs.shader_config[index]};
+        const auto& shader_config{maxwell3d->regs.shader_config[index]};
         const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
         const GPUVAddr shader_addr{base_addr + shader_config.offset};
-        const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+        const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
         if (!cpu_shader_addr) {
             LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
             last_shaders_valid = false;
@@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
         const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
         if (!shader_info) {
             const u32 start_address{shader_config.offset};
-            GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+            GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
             shader_info = MakeShaderInfo(env, *cpu_shader_addr);
         }
         shader_infos[index] = shader_info;
@@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
 }
 
 const ShaderInfo* ShaderCache::ComputeShader() {
-    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
-    const auto& qmd{kepler_compute.launch_description};
+    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+    const auto& qmd{kepler_compute->launch_description};
     const GPUVAddr shader_addr{program_base + qmd.program_start};
-    const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+    const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
     if (!cpu_shader_addr) {
         LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
         return nullptr;
@@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
     if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
         return shader;
     }
-    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
     return MakeShaderInfo(env, *cpu_shader_addr);
 }
 
 void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
                                           const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
     size_t env_index{};
-    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
     for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
         if (unique_hashes[index] == 0) {
             continue;
         }
         const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
         auto& env{result.envs[index]};
-        const u32 start_address{maxwell3d.regs.shader_config[index].offset};
-        env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+        const u32 start_address{maxwell3d->regs.shader_config[index].offset};
+        env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
         env.SetCachedSize(shader_infos[index]->size_bytes);
         result.env_ptrs[env_index++] = &env;
     }
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f67cea8c4..a4391202d 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -12,6 +12,7 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/shader_environment.h"
 
@@ -19,6 +20,10 @@ namespace Tegra {
 class MemoryManager;
 }
 
+namespace Tegra::Control {
+struct ChannelState;
+}
+
 namespace VideoCommon {
 
 class GenericEnvironment;
@@ -28,7 +33,7 @@ struct ShaderInfo {
     size_t size_bytes{};
 };
 
-class ShaderCache {
+class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
     static constexpr u64 YUZU_PAGEBITS = 14;
     static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;
 
@@ -71,9 +76,7 @@ protected:
         }
     };
 
-    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_);
+    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);
 
     /// @brief Update the hashes and information of shader stages
     /// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -88,10 +91,6 @@ protected:
     void GetGraphicsEnvironments(GraphicsEnvironments& result,
                                  const std::array<u64, NUM_PROGRAMS>& unique_hashes);
 
-    Tegra::MemoryManager& gpu_memory;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-
     std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
     bool last_shaders_valid = false;
 
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1f85ec9da..620565684 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -88,6 +88,9 @@ struct ImageBase {
     u32 scale_rating = 0;
     u64 scale_tick = 0;
     bool has_scaled = false;
+
+    size_t channel = 0;
+
     ImageFlagBits flags = ImageFlagBits::CpuModified;
 
     GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1dbe01bc0..2731aead0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
 
 #include "common/alignment.h"
 #include "common/settings.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/texture_cache/image_view_base.h"
@@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
 using namespace Common::Literals;
 
 template <class P>
-TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
-                              Tegra::Engines::Maxwell3D& maxwell3d_,
-                              Tegra::Engines::KeplerCompute& kepler_compute_,
-                              Tegra::MemoryManager& gpu_memory_)
-    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
+TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
+    : runtime{runtime_}, rasterizer{rasterizer_} {
     // Configure null sampler
     TSCEntry sampler_descriptor{};
     sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
     sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
     sampler_descriptor.cubemap_anisotropy.Assign(1);
 
+    // Setup channels
+    current_channel_id = UNSET_CHANNEL;
+    state = nullptr;
+    maxwell3d = nullptr;
+    kepler_compute = nullptr;
+    gpu_memory = nullptr;
+
     // Make sure the first index is reserved for the null resources
     // This way the null resource becomes a compile time constant
     void(slot_images.insert(NullImageParams{}));
@@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
             const auto copies = FullDownloadCopies(image.info);
             image.DownloadMemory(map, copies);
             runtime.Finish();
-            SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+            SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
         }
         if (True(image.flags & ImageFlagBits::Tracked)) {
             UntrackImage(image, image_id);
@@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
 template <class P>
 template <bool has_blacklists>
 void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
+    FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
+                                   views);
 }
 
 template <class P>
 void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
+    FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
 }
 
 template <class P>
 typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
-    if (index > graphics_sampler_table.Limit()) {
+    if (index > state->graphics_sampler_table.Limit()) {
         LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
         return &slot_samplers[NULL_SAMPLER_ID];
     }
-    const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
-    SamplerId& id = graphics_sampler_ids[index];
+    const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
+    SamplerId& id = state->graphics_sampler_ids[index];
     if (is_new) {
         id = FindSampler(descriptor);
     }
@@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
 
 template <class P>
 typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
-    if (index > compute_sampler_table.Limit()) {
+    if (index > state->compute_sampler_table.Limit()) {
         LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
         return &slot_samplers[NULL_SAMPLER_ID];
     }
-    const auto [descriptor, is_new] = compute_sampler_table.Read(index);
-    SamplerId& id = compute_sampler_ids[index];
+    const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
+    SamplerId& id = state->compute_sampler_ids[index];
     if (is_new) {
         id = FindSampler(descriptor);
     }
@@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
 template <class P>
 void TextureCache<P>::SynchronizeGraphicsDescriptors() {
     using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
-    const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
-    const u32 tic_limit = maxwell3d.regs.tic.limit;
-    const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
-    if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
-        graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
+    const u32 tic_limit = maxwell3d->regs.tic.limit;
+    const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
+    if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
+        state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
     }
-    if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
-        graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
+        state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
     }
 }
 
 template <class P>
 void TextureCache<P>::SynchronizeComputeDescriptors() {
-    const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
-    const u32 tic_limit = kepler_compute.regs.tic.limit;
-    const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
-    const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
-    if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
-        compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
+    const u32 tic_limit = kepler_compute->regs.tic.limit;
+    const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
+    const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
+    if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+        state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
     }
-    if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
-        compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
+        state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
     }
 }
 
 template <class P>
 bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
     u32 scale_rating = 0;
     bool rescaled = false;
     std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
 template <class P>
 void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
     using namespace VideoCommon::Dirty;
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
     if (!flags[Dirty::RenderTargets]) {
         for (size_t index = 0; index < NUM_RT; ++index) {
             ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
     PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
 
     for (size_t index = 0; index < NUM_RT; ++index) {
-        render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
+        render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
     }
     u32 up_scale = 1;
     u32 down_shift = 0;
@@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
         down_shift = Settings::values.resolution_info.down_shift;
     }
     render_targets.size = Extent2D{
-        (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
-        (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
+        (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
+        (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
     };
 
     flags[Dirty::DepthBiasGlobal] = true;
@@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
         const auto copies = FullDownloadCopies(image.info);
         image.DownloadMemory(map, copies);
         runtime.Finish();
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+        SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
     }
 }
 
@@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
     for (const ImageId image_id : download_ids) {
         const ImageBase& image = slot_images[image_id];
         const auto copies = FullDownloadCopies(image.info);
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+        SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
         download_map.offset += image.unswizzled_size_bytes;
         download_span = download_span.subspan(image.unswizzled_size_bytes);
     }
@@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
     const GPUVAddr gpu_addr = image.gpu_addr;
 
     if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
-        gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
+        gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
         const auto uploads = FullUploadSwizzles(image.info);
         runtime.AccelerateImageUpload(image, staging, uploads);
     } else if (True(image.flags & ImageFlagBits::Converted)) {
         std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
-        auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
+        auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
         ConvertImage(unswizzled_data, image.info, mapped_span, copies);
         image.UploadMemory(staging, copies);
     } else {
-        const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
+        const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
         image.UploadMemory(staging, copies);
     }
 }
 
 template <class P>
 ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
-    if (!IsValidEntry(gpu_memory, config)) {
+    if (!IsValidEntry(*gpu_memory, config)) {
         return NULL_IMAGE_VIEW_ID;
     }
-    const auto [pair, is_new] = image_views.try_emplace(config);
+    const auto [pair, is_new] = state->image_views.try_emplace(config);
     ImageViewId& image_view_id = pair->second;
     if (is_new) {
         image_view_id = CreateImageView(config);
@@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
 template <class P>
 ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                    RelaxedOptions options) {
-    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
     if (!cpu_addr) {
-        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+        cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
         if (!cpu_addr) {
             return ImageId{};
         }
@@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
         image.scale_tick = frame_tick + 1;
     }
     const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
-    auto& dirty = maxwell3d.dirty.flags;
+    auto& dirty = maxwell3d->dirty.flags;
     dirty[Dirty::RenderTargets] = true;
     dirty[Dirty::ZetaBuffer] = true;
     for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
     image.image_view_ids.clear();
     image.image_view_infos.clear();
     if constexpr (ENABLE_VALIDATION) {
-        std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
-        std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+        std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
+        std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
     }
-    graphics_image_table.Invalidate();
-    compute_image_table.Invalidate();
+    state->graphics_image_table.Invalidate();
+    state->compute_image_table.Invalidate();
     has_deleted_images = true;
 }
 
@@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
 template <class P>
 ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                      RelaxedOptions options) {
-    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
     if (!cpu_addr) {
         const auto size = CalculateGuestSizeInBytes(info);
-        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+        cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
         if (!cpu_addr) {
             const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
             virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
     Image& new_image = slot_images[new_image_id];
 
-    if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+    if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
         new_image.flags |= ImageFlagBits::Sparse;
     }
 
@@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
     if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
         return NULL_SAMPLER_ID;
     }
-    const auto [pair, is_new] = samplers.try_emplace(config);
+    const auto [pair, is_new] = state->samplers.try_emplace(config);
     if (is_new) {
         pair->second = slot_samplers.insert(runtime, config);
     }
@@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
 
 template <class P>
 ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
     if (index >= regs.rt_control.count) {
         return ImageViewId{};
     }
@@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
 
 template <class P>
 ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
     if (!regs.zeta_enable) {
         return ImageViewId{};
     }
@@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
     boost::container::small_vector<ImageId, 8> images;
     ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
-        const auto it = gpu_page_table.find(page);
-        if (it == gpu_page_table.end()) {
+        const auto it = state->gpu_page_table.find(page);
+        if (it == state->gpu_page_table.end()) {
             if constexpr (BOOL_BREAK) {
                 return false;
             } else {
@@ -1403,9 +1408,9 @@ template <typename Func>
 void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
     using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
     static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
-    const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+    const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
     for (const auto& [gpu_addr, size] : segments) {
-        std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
         ASSERT(cpu_addr);
         if constexpr (RETURNS_BOOL) {
             if (func(gpu_addr, *cpu_addr, size)) {
@@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
     image.lru_index = lru_cache.Insert(image_id, frame_tick);
 
     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
-                   [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+                   [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
     if (False(image.flags & ImageFlagBits::Sparse)) {
         auto map_id =
             slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
             }
             image_ids.erase(vector_it);
         };
-    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
-                   [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+        clear_page_table(page, state->gpu_page_table);
+    });
     if (False(image.flags & ImageFlagBits::Sparse)) {
         const auto map_id = image.map_view_id;
         ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
     ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
 
     // Mark render targets as dirty
-    auto& dirty = maxwell3d.dirty.flags;
+    auto& dirty = maxwell3d->dirty.flags;
     dirty[Dirty::RenderTargets] = true;
     dirty[Dirty::ZetaBuffer] = true;
     for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
     if (alloc_images.empty()) {
         image_allocs_table.erase(alloc_it);
     }
-    if constexpr (ENABLE_VALIDATION) {
-        std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
-        std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+    for (auto& this_state : channel_storage) {
+        if constexpr (ENABLE_VALIDATION) {
+            std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
+            std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
+        }
+        this_state.graphics_image_table.Invalidate();
+        this_state.compute_image_table.Invalidate();
     }
-    graphics_image_table.Invalidate();
-    compute_image_table.Invalidate();
     has_deleted_images = true;
 }
 
 template <class P>
 void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
-    auto it = image_views.begin();
-    while (it != image_views.end()) {
+    auto it = state->image_views.begin();
+    while (it != state->image_views.end()) {
         const auto found = std::ranges::find(removed_views, it->second);
         if (found != removed_views.end()) {
-            it = image_views.erase(it);
+            it = state->image_views.erase(it);
         } else {
             ++it;
         }
@@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
     const ImageViewBase& image_view = slot_image_views[id];
     const ImageBase& image = slot_images[image_view.image_id];
     const Extent3D size = image_view.size;
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
     const auto& scissor = regs.scissor_test[0];
     if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
         // Images with multiple resources can't be cleared in a single call
@@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
            scissor.max_y >= size.height;
 }
 
+template <class P>
+TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
+    : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
+      gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
+      graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
+                                                                               gpu_memory} {}
+
+template <class P>
+void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+    ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+    auto new_id = [this, &channel]() {
+        if (!free_channel_ids.empty()) {
+            auto id = free_channel_ids.front();
+            free_channel_ids.pop_front();
+            new (&channel_storage[id]) ChannelInfo(channel);
+            return id;
+        }
+        channel_storage.emplace_back(channel);
+        return channel_storage.size() - 1;
+    }();
+    channel_map.emplace(channel.bind_id, new_id);
+    if (current_channel_id != UNSET_CHANNEL) {
+        state = &channel_storage[current_channel_id];
+    }
+}
+
+/// Bind a channel for execution.
+template <class P>
+void TextureCache<P>::BindToChannel(s32 id) {
+    auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    current_channel_id = it->second;
+    state = &channel_storage[current_channel_id];
+    maxwell3d = &state->maxwell3d;
+    kepler_compute = &state->kepler_compute;
+    gpu_memory = &state->gpu_memory;
+}
+
+/// Erase channel's state.
+template <class P>
+void TextureCache<P>::EraseChannel(s32 id) {
+    const auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    const auto this_id = it->second;
+    free_channel_ids.push_back(this_id);
+    channel_map.erase(it);
+    if (this_id == current_channel_id) {
+        current_channel_id = UNSET_CHANNEL;
+        state = nullptr;
+        maxwell3d = nullptr;
+        kepler_compute = nullptr;
+        gpu_memory = nullptr;
+    } else if (current_channel_id != UNSET_CHANNEL) {
+        state = &channel_storage[current_channel_id];
+    }
+}
+
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7e6c6cef2..69efcb718 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include <deque>
+#include <limits>
 #include <mutex>
 #include <span>
 #include <type_traits>
@@ -26,6 +28,10 @@
 #include "video_core/texture_cache/types.h"
 #include "video_core/textures/texture.h"
 
+namespace Tegra::Control {
+struct ChannelState;
+}
+
 namespace VideoCommon {
 
 using Tegra::Texture::SwizzleSource;
@@ -58,6 +64,8 @@ class TextureCache {
     /// True when the API can provide info about the memory of the device.
     static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
 
+    static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
     static constexpr s64 TARGET_THRESHOLD = 4_GiB;
     static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
     static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@@ -85,8 +93,7 @@ class TextureCache {
     };
 
 public:
-    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
-                          Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
+    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
 
     /// Notify the cache that a new frame has been queued
     void TickFrame();
@@ -171,6 +178,15 @@ public:
 
     [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
 
+    /// Create channel state.
+    void CreateChannel(struct Tegra::Control::ChannelState& channel);
+
+    /// Bind a channel for execution.
+    void BindToChannel(s32 id);
+
+    /// Erase channel's state.
+    void EraseChannel(s32 id);
+
     std::mutex mutex;
 
 private:
@@ -338,31 +354,52 @@ private:
     u64 GetScaledImageSizeBytes(ImageBase& image);
 
     Runtime& runtime;
-    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
 
-    DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
-    std::vector<SamplerId> graphics_sampler_ids;
-    std::vector<ImageViewId> graphics_image_view_ids;
+    struct ChannelInfo {
+        ChannelInfo() = delete;
+        ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
+        ChannelInfo(const ChannelInfo& state) = delete;
+        ChannelInfo& operator=(const ChannelInfo&) = delete;
+        ChannelInfo(ChannelInfo&& other) noexcept = default;
+        ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
+
+        Tegra::Engines::Maxwell3D& maxwell3d;
+        Tegra::Engines::KeplerCompute& kepler_compute;
+        Tegra::MemoryManager& gpu_memory;
+
+        DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+        DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+        std::vector<SamplerId> graphics_sampler_ids;
+        std::vector<ImageViewId> graphics_image_view_ids;
 
-    DescriptorTable<TICEntry> compute_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
-    std::vector<SamplerId> compute_sampler_ids;
-    std::vector<ImageViewId> compute_image_view_ids;
+        DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+        DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+        std::vector<SamplerId> compute_sampler_ids;
+        std::vector<ImageViewId> compute_image_view_ids;
+
+        std::unordered_map<TICEntry, ImageViewId> image_views;
+        std::unordered_map<TSCEntry, SamplerId> samplers;
+
+        std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+    };
+
+    std::deque<ChannelInfo> channel_storage;
+    std::deque<size_t> free_channel_ids;
+    std::unordered_map<s32, size_t> channel_map;
+
+    ChannelInfo* state;
+    size_t current_channel_id{UNSET_CHANNEL};
+    VideoCore::RasterizerInterface& rasterizer;
+    Tegra::Engines::Maxwell3D* maxwell3d;
+    Tegra::Engines::KeplerCompute* kepler_compute;
+    Tegra::MemoryManager* gpu_memory;
 
     RenderTargets render_targets;
 
-    std::unordered_map<TICEntry, ImageViewId> image_views;
-    std::unordered_map<TSCEntry, SamplerId> samplers;
     std::unordered_map<RenderTargets, FramebufferId> framebuffers;
 
     std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
-    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
     std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
-
     std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
 
     VAddr virtual_invalid_space{};
-- 
cgit v1.2.3